##################
# China GFDI MA paper 
# Date: 2/1/2023
# Author: Aycan Katitas 
# Code to Generate Figure Files 
##################

setwd("~/Desktop/Git/chinagfdi")

## FIGURES A2-A3

## PROPRIETARY DATA SOURCE - REFER TO README FILE
load("data/usfdi.Rdata")

anchina <- usfdifips %>%
  filter(source.country=="China") %>%  
  group_by(Year) %>% 
  summarise(chinano=n(),
            chinaval=sum(value_2010,na.rm=TRUE),
            chinajobs=sum(jobs,na.rm=TRUE)) %>% 
  filter(Year!=2021)

## PROPRIETARY DATA SOURCE - REFER TO README FILE
chinama <- read.csv("data/china20002012.csv",skip=1,na.strings=c("","NA"),stringsAsFactors = FALSE)

chinama <- chinama %>% 
  dplyr::select(-X)

## PROPRIETARY DATA SOURCE - REFER TO README FILE
chinama2 <- read.csv("data/china1321.csv",skip=1,na.strings=c("","NA"),stringsAsFactors = FALSE)

chinaanma <- rbind(chinama,chinama2)

chinaanmaf <- chinaanma %>% 
  separate(X..Date.Announced,c("month","day","year"),sep="/") %>%
  mutate(Year=paste0("20",year)) %>% 
  count(Year) %>% 
  filter(Year!="2021"&Year!="20NA") %>%
  rename(ma=n) %>%
  mutate(Year=as.numeric(Year))

chinaan <- left_join(anchina,chinaanmaf)

# china bop 
chbop <- read.csv("data/chinabopposition.csv",skip=5)

chbop2 <- chbop %>% 
  mutate(X=trimws(X)) %>% 
  filter(X=="China") %>%
  pivot_longer(cols=X2000:X2020,
               names_to="Year",
               values_to="bop") %>% 
  mutate(Year=as.numeric(trimws((gsub("X","",Year))))) %>% 
  dplyr::select(-X)

chinaan2 <- left_join(chinaan,chbop2) %>%
  pivot_wider(names_from="Year",
              values_from=chinano:bop) %>% 
  pivot_longer(cols=chinano_2004:bop_2020,
               names_to="group",
               values_to="value") %>% 
  mutate(group=gsub("china","",group)) %>% 
  separate(group,c("group","Year"),sep="_") %>% 
  mutate(value=as.numeric(value),
         Year=as.numeric(Year))

saveRDS(chinaan2,file="data/chinabopgfdima.Rds")

## FIGURE A4 

# get 2020 congressional district map
library(USAboundaries)
library(sf)
library(leaflet)
library(concordance)

congress_sf <- us_congressional(resolution = 'low')

# proportion 2004-2020 gfdi to 2020 CD - GeoCORR Missouri - https://mcdc.missouri.edu/applications/geocorr.html  

ctycd <- read.csv("data/countycd116.csv",skip=1)

names(ctycd) <- c("FIPS","statefips","cd116","stateab","countryname","pop20","afact")

chinamap <- usfdifips %>% 
  filter(source.country=="China") %>% 
  count(FIPS) %>%
  rename(count=n) %>% 
  #group_by(FIPS) %>% 
  #summarise(val=sum(value_2010,na.rm=T)) %>% 
  left_join(ctycd) %>% 
  mutate(cdids=paste0(stateab,cd116),
         wcount=count*afact) %>% 
  group_by(cdids) %>% 
  summarise(count=sum(wcount,na.rm=T))

cdmap <- congress_sf %>% 
  mutate(cdids=paste0(state_abbr,as.numeric(cd116fp))) %>%
  left_join(chinamap) %>% 
  arrange(cdids) %>%
  mutate(count=ifelse(is.na(count),0,count)) %>%
  filter(state_abbr!="HI"&state_abbr!="AK"&state_abbr!="PR")

saveRDS(cdmap,file="data/chinamap.Rds")

## FIGURE A7 

## PROPRIETARY DATA SOURCE - REFER TO README FILE
crossnaics <- read.csv("data/fdimarketsnaics.csv")

indnames <- read.csv("data/naics2names.csv") %>% 
  mutate(naics2=as.numeric(naics2))

cnaics <- crossnaics %>% 
  dplyr::select(-Sector_id,-X) %>%
  mutate(ind.sector=tolower(trimws(ind.sector)),
         subsector=tolower(trimws(subsector)))


industrychina <- usfdifips %>%
  filter(Date<"2020-09-01") %>% 
  filter(source.country=="China") %>% 
  arrange(Date) %>% 
  mutate(ind.sector=tolower(trimws(ind.sector)),
         subsector=tolower(trimws(subsector))) %>% 
  mutate(subsector=ifelse(subsector=="motorcycle, bicycle, & parts","motorcyle, bicycle, & parts",
                          ifelse(subsector=="clothing & clothing accessories","clothing & clothing accessories stores",
                                 ifelse(subsector=="furniture, homeware & related products (wood products)",
                                        "furniture, homeware & related products",
                                        ifelse(subsector=="furniture, homeware & related products (consumer products)",
                                               "furniture, homeware & related products",
                                               ifelse(subsector=="accommodation","accomodation",
                                                      ifelse(subsector=="furniture, homeware & related products (textiles)",
                                                             "furniture, homeware & related products",
                                                             ifelse(subsector=="motor vehicle & parts dealers (automotive components)",
                                                                    "motor vehicle & parts dealers",
                                                                    ifelse(subsector=="audio & video equipment (electronic components)",
                                                                           "audio & video equipment",subsector)))))))),
         ind.sector=ifelse(ind.sector=="industrial equipment","industrial machinery, equipment & tools",
                           ifelse(ind.sector=="warehousing","warehousing & storage",
                                  ifelse(ind.sector=="coal, oil & gas","coal, oil and natural gas",
                                         ifelse(ind.sector=="food & beverages","food & tobacco",
                                                ifelse(ind.sector=="transportation & warehousing","transportation",
                                                       ifelse(ind.sector=="building materials","building & construction materials",
                                                              ifelse(ind.sector=="renewable energy","alternative/renewable energy",ind.sector
                                                              )))))))) %>% 
  left_join(cnaics) %>% 
  dplyr::select(Date,investing.comp,ind.sector,subsector,naics) %>% 
  mutate(naics2=as.numeric(substr(as.character(naics),1,2))) %>% 
  mutate(naics2=ifelse(naics2==32|naics2==33,31,
                       ifelse(naics2==45,44,
                              ifelse(naics2==49,48,naics2)))) %>% 
  left_join(indnames) # 21 obs that didn't match with the industry names 


# sic naics crosswalk - dont match 1 to 1 
sicnaics <- read.csv("data/SIC4_to_NAICS6.csv") %>%  group_by(SIC4) %>% 
  filter(Emp_weight==max(Emp_weight)) %>% 
  rename(target.sic=SIC4,
         naics=NAICS6) %>% 
  dplyr::select(target.sic,naics)

chinama <- read.csv("data/chinama.csv")

chinamaind <- chinama %>% 
  mutate(year=as.numeric(substr(dateann,7,8))) %>% 
  filter(year>3&year<21) %>% 
  dplyr::select(dateann,target.name,target.sic,target.state) %>% 
  mutate(target.sic=as.numeric(target.sic)) %>% 
  filter(!is.na(target.sic)) %>% 
  left_join(sicnaics) %>%
  mutate(naics=ifelse(target.sic==1011,212210,
                      ifelse(target.sic==1021,212230,
                             ifelse(target.sic==1041,212221,
                                    ifelse(target.sic==1044,212222,
                                           ifelse(target.sic==1061,212230,
                                                  ifelse(target.sic==1094,212291,
                                                         ifelse(target.sic==1221,212111,
                                                                ifelse(target.sic==1241,213113,
                                                                       ifelse(target.sic==1381,213111,
                                                                              ifelse(target.sic==1382,213112,
                                                                                     ifelse(target.sic==1389,213112,
                                                                                            ifelse(target.sic==1522,236118,
                                                                                                   ifelse(target.sic==1711,238210,
                                                                                                          ifelse(target.sic==1781,237110,
                                                                                                                 ifelse(target.sic==2011,311611,
                                                                                                                        ifelse(target.sic==2013,311612,
                                                                                                                               ifelse(target.sic==2023,311511,
                                                                                                                                      ifelse(target.sic==4922,486210,
                                                                                                                                             ifelse(target.sic==4953,562920,
                                                                                                                                                    ifelse(target.sic==6029,522110,
                                                                                                                                                           ifelse(target.sic==6726,525990,
                                                                                                                                                                  ifelse(target.sic==8051,623110,
                                                                                                                                                                         ifelse(target.sic==8211,611110,
                                                                                                                                                                                ifelse(target.sic==8221,611310,
                                                                                                                                                                                       ifelse(target.sic==9511,924110,
                                                                                                                                                                                              ifelse(target.sic==1311,211120,naics
                                                                                                                                                                                              ))))))))))))))))))))))))))) %>%
  mutate(naics2=as.numeric(substr(as.character(naics),1,2))) %>% 
  mutate(naics2=ifelse(naics2==32|naics2==33,31,
                       ifelse(naics2==45,44,
                              ifelse(naics2==49,48,naics2)))) %>% 
  left_join(indnames) 

indgren <- industrychina %>% 
  add_count() %>% 
  rename(allcount=n) %>% 
  count(naics2,indname,allcount) %>% 
  rename(count=n) %>%
  mutate(per=count/allcount) %>% 
  mutate(group="green")

indma <- chinamaind %>% 
  add_count() %>% 
  rename(allcount=n) %>% 
  count(naics2,indname,allcount) %>% 
  rename(count=n) %>%
  mutate(per=count/allcount) %>%
  mutate(group="ma")

indgraph <- rbind(indma,indgren)

saveRDS(indgraph,"data/chinamagfdiind.Rds")

## FIGURE A8 

## CFIUS coding of critical industries - 2009 CFIUS report

natsec <- c(314,324,325,326,327,331,332,333,334,335,336,339,
            511,517,518,519,523,525,541,561,562,
            211,212,213,221,236,237,238,
            423,424,443,483,484,485,488,493)

indma2 <- chinamaind %>% 
  dplyr::select(dateann,target.name,target.state,naics,indname) %>% 
  mutate(naics3=substr(as.character(naics),1,3))  %>% 
  mutate(natsec=ifelse(naics3 %in% natsec,"sensitive","not sensitive")) %>%  
  add_count() %>% 
  rename(total=n) %>% 
  count(total,natsec) %>%  
  rename(count=n) %>% 
  mutate(per=count/total) %>% 
  mutate(group="ma")


indgren2 <- industrychina %>% 
  mutate(naics3=as.numeric(substr(as.character(naics),1,3))) %>% 
  mutate(naics3=str_pad(naics3, width=3, side="right", pad="0")) %>% 
  mutate(natsec=ifelse(naics3 %in% natsec,"sensitive","not sensitive"))%>% 
  add_count() %>% 
  rename(total=n) %>% 
  count(total,natsec) %>% 
  rename(count=n) %>% 
  mutate(per=count/total) %>% 
  mutate(group="green")

indgraph2 <- rbind(indma2,indgren2)

saveRDS(indgraph2,"data/chinaindgraphnatsec.Rds")
